clear all
use "$path\Raw_Data\ipumsi_israel.dta"

gen byte native = bplcountry == 34070
label var native "Born in Israel"

*adjust for pooling census years
replace strata = 1 if year==2008 //this year was unstratified
egen new_strata = group(year strata)

gen sampling_rate = 0.07 if year == 2008
	replace sampling_rate = 0.1 if year == 1995
	replace sampling_rate = 0.1 if year == 1983
	replace sampling_rate = 0.1 if year == 1972

gen perwt_main = perwt

*for main analysis
count
local tot_obs = r(N)
levelsof year, local(years)
    foreach y of local years {
	count if year == `y'
	display "year `y' has r(N) obs"
	replace perwt_main = perwt * r(N) / `tot_obs' if year == `y'
	}


*****************************
* country of birth variables, use codes from il2008a_bplcntry
gen birth_country = il2008a_bplcntry if year == 2008
		replace birth_country = il1995a_bpl if year == 1995
		replace birth_country = il1983a_bpl if year == 1983
		replace birth_country = il1972a_bpl if year == 1972

*match codes to 2008 version
recode birth_country (2 = 225) (5 = 50) (7 = 70) (9 = 90) (11 = 960) (17 = 15) (18 = 2) (20 = 200) (21 = 220) (23 = 970) (24 = 240) (28 = 3) (30 = 306) (31 = 310) (40 = 400) (42 = 980) (50 = 981) (53 = 982) (54 = 982) (68 = 4) (71 = 8) (85 = 7) (90 = 999) (99 = 999) if year == 1983 | year == 1972
	
gen byte Sat = inlist(birth_country,310,400,980,982)
gen byte Sov = inlist(birth_country,1,5,304,305,306,308,315) 
gen byte west = inlist(birth_country,4,640,983)
gen byte EB = max(Sov,Sat)
gen Imm = max(west,EB)

***************************************************************************************
* Demographics

rename yrimm2 immigration_year

gen immigration_year_approx = 0
replace immigration_year_approx = 1887 if immigration_year == 1
replace immigration_year_approx = 1892 if immigration_year == 2
replace immigration_year_approx = 1897 if immigration_year == 3
replace immigration_year_approx = 1902 if immigration_year == 4
replace immigration_year_approx = 1908.5 if immigration_year == 5
replace immigration_year_approx = 1907 if immigration_year == 6
replace immigration_year_approx = 1912 if immigration_year == 7
replace immigration_year_approx = 1917 if immigration_year == 8
replace immigration_year_approx = 1917 if immigration_year == 9
replace immigration_year_approx = 1931 if immigration_year == 10
replace immigration_year_approx = 1922 if immigration_year == 11
replace immigration_year_approx = 1927 if immigration_year == 12
replace immigration_year_approx = 1932 if immigration_year == 13
replace immigration_year_approx = 1930 if immigration_year == 14
replace immigration_year_approx = 1935.5 if immigration_year == 15
replace immigration_year_approx = 1937 if immigration_year == 16
replace immigration_year_approx = 1942.5 if immigration_year == 17
replace immigration_year_approx = 1942 if immigration_year == 18
replace immigration_year_approx = 1945.5 if immigration_year == 19
replace immigration_year_approx = 1945 if immigration_year == 20
replace immigration_year_approx = 1944.5 if immigration_year == 21
replace immigration_year_approx = 1947 if immigration_year == 22
replace immigration_year_approx = 1953 if immigration_year == 23
replace immigration_year_approx = 1950.5 if immigration_year == 24
replace immigration_year_approx = 1953 if immigration_year == 25
replace immigration_year_approx = 1950 if immigration_year == 26
replace immigration_year_approx = 1947.5 if immigration_year == 27
replace immigration_year_approx = 1951.5 if immigration_year == 28
replace immigration_year_approx = 1956 if immigration_year == 29
replace immigration_year_approx = 1949.5 if immigration_year == 30
replace immigration_year_approx = 1950 if immigration_year == 31
replace immigration_year_approx = 1952 if immigration_year == 32
replace immigration_year_approx = 1954.5 if immigration_year == 33
replace immigration_year_approx = 1955.5 if immigration_year == 34
replace immigration_year_approx = 1956 if immigration_year == 35
replace immigration_year_approx = 1957 if immigration_year == 36
replace immigration_year_approx = 1957.5 if immigration_year == 37
replace immigration_year_approx = 1959.5 if immigration_year == 38
replace immigration_year_approx = 1959 if immigration_year == 39
replace immigration_year_approx = 1960 if immigration_year == 40
replace immigration_year_approx = 1962 if immigration_year == 41
replace immigration_year_approx = 1962 if immigration_year == 42
replace immigration_year_approx = 1962.5 if immigration_year == 43
replace immigration_year_approx = 1965.5 if immigration_year == 44
replace immigration_year_approx = 1966 if immigration_year == 45
replace immigration_year_approx = 1968 if immigration_year == 46
replace immigration_year_approx = 1964.5 if immigration_year == 47
replace immigration_year_approx = 1965 if immigration_year == 48
replace immigration_year_approx = 1967 if immigration_year == 49
replace immigration_year_approx = 1968 if immigration_year == 50
replace immigration_year_approx = 1965 if immigration_year == 51
replace immigration_year_approx = 1966 if immigration_year == 52
replace immigration_year_approx = 1967 if immigration_year == 53
replace immigration_year_approx = 1968 if immigration_year == 54
replace immigration_year_approx = 1967 if immigration_year == 55
replace immigration_year_approx = 1968 if immigration_year == 56
replace immigration_year_approx = 1969 if immigration_year == 57
replace immigration_year_approx = 1969.5 if immigration_year == 58
replace immigration_year_approx = 1970.5 if immigration_year == 59
replace immigration_year_approx = 1971 if immigration_year == 60
replace immigration_year_approx = 1972 if immigration_year == 61
replace immigration_year_approx = 1970 if immigration_year == 62
replace immigration_year_approx = 1975.5 if immigration_year == 63
replace immigration_year_approx = 1971 if immigration_year == 64
replace immigration_year_approx = 1973 if immigration_year == 65
replace immigration_year_approx = 1975.5 if immigration_year == 66
replace immigration_year_approx = 1977.5 if immigration_year == 67
replace immigration_year_approx = 1977 if immigration_year == 68
replace immigration_year_approx = 1977.5 if immigration_year == 69
replace immigration_year_approx = 1978 if immigration_year == 70
replace immigration_year_approx = 1980.5 if immigration_year == 71
replace immigration_year_approx = 1984.5 if immigration_year == 72
replace immigration_year_approx = 1983 if immigration_year == 73
replace immigration_year_approx = 1983.5 if immigration_year == 74
replace immigration_year_approx = 1985.5 if immigration_year == 75
replace immigration_year_approx = 1983 if immigration_year == 76
replace immigration_year_approx = 1985.5 if immigration_year == 77
replace immigration_year_approx = 1985.5 if immigration_year == 78
replace immigration_year_approx = 1986 if immigration_year == 79
replace immigration_year_approx = 1988.5 if immigration_year == 80
replace immigration_year_approx = 1987 if immigration_year == 81
replace immigration_year_approx = 1988.5 if immigration_year == 82
replace immigration_year_approx = 1988 if immigration_year == 83
replace immigration_year_approx = 1989 if immigration_year == 84
replace immigration_year_approx = 1990.5 if immigration_year == 85
replace immigration_year_approx = 1994 if immigration_year == 86
replace immigration_year_approx = 1992.5 if immigration_year == 87
replace immigration_year_approx = 1993 if immigration_year == 88
replace immigration_year_approx = 1993.5 if immigration_year == 89
replace immigration_year_approx = 1995.5 if immigration_year == 90
replace immigration_year_approx = 1993.5 if immigration_year == 91
replace immigration_year_approx = 2000 if immigration_year == 92
replace immigration_year_approx = 1996.5 if immigration_year == 93
replace immigration_year_approx = 1999 if immigration_year == 94
replace immigration_year_approx = 2000 if immigration_year == 95
replace immigration_year_approx = 2002.5 if immigration_year == 96
replace immigration_year_approx = 2005 if immigration_year == 97
replace immigration_year_approx = 2003.5 if immigration_year == 98
replace immigration_year_approx = 2006 if immigration_year == 99
replace immigration_year_approx = . if immigration_year == 998

label var immigration_year_approx "midpoint of year of immigration range"

* age at immigration
gen int immigration_age = age - (year - immigration_year_approx)

replace immigration_age = 0 if immigration_age < 0
replace immigration_age = 900 if native == 1
label var immigration_age "best estimate of age at immigration"
note immigration_age: Natives are coded as 900

*** approximate years in Israel

gen years_in_israel = year - immigration_year_approx
replace years_in_israel = 900 if native == 1
label var years_in_israel "Years in Israel"

gen byte female = sex == 2
drop sex

gen byte married = marst==2
drop marst

***************************************************************************************
* outcome variables

*** employment status
gen employed = inlist(classwk,1,2,3)

* occupation skill categories
* match Germany coding: ISCO-08 1, 2, 3 = high, 4-8 = medium
gen high_occ = inlist(occ,0,1) if year == 1972 //includes admin, clerical workers...
gen high_med_occ = inlist(occ,0,1,2,4,6,7) if year == 1972

replace high_occ = inlist(occ,1,2,3) if year == 1983
replace high_med_occ = inlist(occ,1,2,3,4,5,6,8) if year == 1983 //excl farm workers

replace high_occ = inlist(occ,1,2,3) if year == 1995
replace high_med_occ = inlist(occ,1,2,3,4,5,6) if year == 1995 //incl skilled farm workers

replace high_occ = inlist(occ,0,1,2) if year == 2008
replace high_med_occ = inlist(occ,0,1,2,3,4,5,6) if year == 2008


***  income - inflation data from https://www.inflationtool.com/israeli-new-shekel
*clean income vars
*https://international.ipums.org/international-action/variables/INCEARN#codes_section
replace incearn = 0 if incearn > 16000 & year == 1972 //values above topcode are NIU
replace incearn = 0 if incearn > 275653 & year == 2008 

replace incwage = 0 if incwage > 74716 & year == 1983 
replace incwage = 0 if incwage > 20000 & year == 1995 

replace incself = 0 if incself > 40000 & year == 1995

*convert to annual USD https://fxtop.com/en/historical-exchange-rates.php
* israeli pound until 1980, 1 shekel = 10 pounds
* shekel until 1985, 1000 old shekels = 1 new shekel
replace incearn = incearn * 1 / 10 / 1000 / 0.000418 if year == 1972
replace incearn = incearn * 1 / 3.679348 if year == 2008
replace incwage = incwage * 12 / 1000 / 0.057093 if year == 1983
replace incwage = incwage * 12 / 2.965342 if year == 1995
replace incself = incself * 12 / 2.965342 if year == 1995

gen income = 0
replace income = incearn if year == 1972 | year == 2008
	replace income = incwage if year == 1983
	replace income = incwage + incself if year == 1995
		
* convert to 1999 USD https://www.usinflationcalculator.com/
replace income = income * 3.99 if year == 1972
replace income = income * 1.67 if year == 1983
replace income = income * 1.09 if year == 1995
replace income = income * 0.77 if year == 2008

label var income "total personal income excl. welfare and social sec."

gen log_income = log(income)
label var log_income "log earned income"

** education -- ISCED97 recode for consistency across countries
generate isced97 = 0
replace isced97 = 2 if edattaind == 212 //1A -- grades 6
replace isced97 = 6 if edattaind == 311 // 3A secondary
replace isced97 = 7 if edattaind == 322  //4 post-secondary technical degree
replace isced97 = 9 if edattaind == 400  //5A college and advanced degrees

*collapse isced97 for Table B4
gen isced97_5a = isced97 >= 9 & isced97 != .
gen isced97_5b = isced97 >= 8 & isced97 != .
gen isced97_4 = isced97 >= 7 & isced97 != .
gen isced97_3 = isced97 >= 6 & isced97 != .

*collapse isced97 for Fig 3
gen isced_int = 1 if inrange(isced97,0,4) // lower secondary or less
replace isced_int = 2 if inrange(isced97,5,6) // upper secondary
replace isced_int = 3 if isced97 == 7 // post-secondary
replace isced_int = 4 if inrange(isced97,8,10) // tertiary

*russia version
gen isced_rus = 1 if inrange(isced97,0,4) // lower secondary or less
replace isced_rus = 2 if inrange(isced97,5,6) // upper secondary
replace isced_rus = 3 if inrange(isced97,7,10) // tertiary incl. post-secondary (not separately categorized in russia census)

gen edu_years = 0
	replace edu_years = 3 if edattaind == 120 //some primary
	replace edu_years = 6 if edattaind == 212 //primary
	replace edu_years = 9 if edattaind == 221 // lower secondary
	replace edu_years = 12 if edattaind == 311 // secondary
	replace edu_years = 13 if edattaind == 312 // some college
	replace edu_years = 14 if edattaind == 322 // post secondary technical
	replace edu_years = 15 if educil == 45 // BA
	replace edu_years = 17 if educil == 46 // MA

label var edu_years "years of education"

** Sample selection
gen sample = (age >= 25 & age <= 65)
	replace sample = 0 if native == 0 & Imm == 0 // include only natives, Western migrants, and CB migrants
	replace sample = 0 if Imm == 1 & immigration_age <=24 //exclude those arriving before age 25
	replace sample = 0 if Imm == 1 & (immigration_year_approx < 1962) // exclude those who came before 1962, will be some interval error but not much
	replace sample = 0 if Imm == 1 & (immigration_year > 2003) // exclude those who came after 2003, when many CB countries join EU
	replace sample = 0 if Imm == 1 & (immigration_year_approx == . | immigration_age == .) //can't use these in analysis
	replace sample = . if bplcountry == 0 //in 1983, didn't record birthplace for non-Jews
	
gen sample_allages = 1
	replace sample_allages = 0 if native == 0 & Imm == 0 // include only natives, Western migrants, and CB migrants
	replace sample_allages = 0 if Imm == 1 & (immigration_year_approx < 1962) // exclude those who came before 1962
	replace sample_allages = 0 if Imm == 1 & (immigration_year > 2003) // exclude those who came after 2003, when many CB countries join EU
	replace sample_allages = 0 if Imm == 1 & (immigration_year_approx == . | immigration_age == .) //can't use these in analysis
	replace sample_allages = . if bplcountry == 0 //in 1983, didn't record birthplace for non-Jews

	
******* generate variables necessary for regressions. note values 86-90 are not observed in the data, so the cuts at 1990 and 1992 are clean.
gen post_90 = immigration_year_approx>=1990 & immigration_year_approx != .
gen post_92 = immigration_year_approx>=1992 & immigration_year_approx != .

foreach var in Imm EB Sov Sat {
	local region: var label `var'

	foreach stub in post_90 post_92{
		gen `var'_`stub' = `var' * `stub'
	}
}	


*bins for control vars
gen age_bin = floor(age/5)*5 //5-yr bins

	replace years_in_israel = 900 if native == 1
	replace years_in_israel = round(years_in_israel,1) //use as FE, not linear control
gen years_in_israel_bin = floor(years_in_israel/5)*5 //5-yr bins
	replace years_in_israel_bin = years_in_israel if years_in_israel<=5 //1-yr bins for first 5 years

compress
save "$path\Tempfiles\IsraeliCensus_vars.dta", replace
